########################################################
## This code trains the baseline model for all years in the sample period.
########################################################

########################################################
## Preparation of the workspace
########################################################

## remove all objects from the current workspace
rm(list=ls())

## load the required packages
library(haven)
library(caret)
library(randomForest)
library(doParallel)
library(mice)
library(plyr)
library(dplyr)
library(VIM)
library(base)
library(ranger)
library(glmnet)
library(xgboost)

## display the current time -> to check how much time it takes to run the code
start_time = Sys.time()

## set the directories
main <- "placeholder_main"

dataDirectory <- paste0(main, "/Data")
RDirectory <- paste0(main, "/Programs/Output Generation")

## Load the tuning and training functions:
source(paste0(RDirectory, "/102_0_caret_parameter_tuning_Function.R"))
source(paste0(RDirectory, "/103_0_caret_predictions_Function.R"))

########################################################
## Define the locals for the loop
########################################################
 
# Define the model that is used (the name of the model is a part of the name of the .dta file with data)
#models <- c("Full")
model_list <- "Full"

# Define the outcome variables that are predicted
dependent_variable <- c("emplAft6M_0M_In", "emplAft6M_6M_In", "emplAft6M_12M_In")

# Define the time period:
years <- 1992:2016


########################################################
## Running the loop
########################################################   

for (model in model_list) {
  
  for (y in years) {


    for (dependent in dependent_variable){
      
      if (dependent == "emplAft6M_0M_In") {
        # Load the dataset:
        data <- read_dta(paste0(dataDirectory, "/002_DataForR_", model, "_", y,".dta"))
        
        
        # Run the tuning function:
        parameters <- tuning(data = data, dependent = dependent,
                             s_tuning = 0.1, seed = 2111, noisily = TRUE)

        write.csv(parameters$rfgrid_final, file = paste0(dataDirectory,"/102_rfgrid_" , model,"_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$boostgrid_final, file = paste0(dataDirectory,"/102_boostgrid_", model, "_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$lassogrid_final, file = paste0(dataDirectory,"/102_lassogrid_", model, "_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$rfgrid_search, file = paste0(dataDirectory,"/102_rfgrid_search_" , model,"_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$boostgrid_search, file = paste0(dataDirectory,"/102_boostgrid_search_", model, "_", dependent, "_", y, ".csv"), row.names = FALSE)
        write.csv(parameters$lassogrid_search, file = paste0(dataDirectory,"/102_lassogrid_search_", model, "_", dependent, "_", y, ".csv"), row.names = FALSE)

        # Run the prediction function
        results <- estimating(data = data, dependent = dependent,
                              parameters = parameters,
                              s_tuning = 0.1, s_training = 0.3, seed = 2111, noisily = TRUE)
        
      } else {
        results <- estimating(data = data, dependent = dependent,
                              parameters = parameters,
                              s_tuning = 0, s_training = 0.4, seed = 2111, noisily = TRUE)
      }
  
        # Save models
        models <- results$models
        save(models, 
             file=paste0(dataDirectory,"/103_Models_", model,"_",dependent, "_", y, ".rda"))
        
        # Save predictions
        write.csv(results$output, 
                  file=paste0(dataDirectory,"/103_predictionsR_", model,"_",dependent, "_", y, ".csv"))
        
    }
  }
} 
